In [60]:
%matplotlib inline

#import envoy 
import json
import pymongo 
from bson import json_util # From  pymongo
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
from datetime import datetime
print "imported"


imported

In [61]:
#set up parallel computing and confirm number of engines

from IPython import parallel

#make sure to enter the correct profile
clients = parallel.Client(profile='nbserver')

# use synchronous computations - all results must finish computing before any results are recorded
clients.block = True  
dview = clients.direct_view()
print clients.ids


[0, 1, 2, 3, 4, 5]

In [62]:
mdb = pymongo.MongoClient()
db = mdb.ferguson
tweets = db.tweets
# twtsm = db.tweets_small # if you only want to work with the smaller sample

_start = datetime.now()
print "Number of tweets"
print tweets.count()
# print twtsm.count() # smaller sample
_end = datetime.now()
_diff = _start - _end
print "Query took {} seconds".format((_diff.microseconds)/1000000.0)


Number of tweets
15125411
Query took 0.999099 seconds

In [63]:
_start = datetime.now()
# user = twtsm.find({"user.id": 718319754}) # for small sample since it's already indexed by user ascending, dates descending
user = tweets.find({"user.id": 718319754})

print "Query took {} seconds".format((_diff.microseconds)/1000000.0)
_end = datetime.now()
_diff = _start - _end


Query took 0.999099 seconds

In [64]:
# turn cursor to an array to work with long term
hotnostrils = list(user)

len(hotnostrils)


Out[64]:
4110

Followers/Friends, Start/End


In [65]:
# warning: very hacky.  not generalizable.  i know ahead of time the that tweets are sorted by date because of query
userinfo = [0] * len(hotnostrils)
counter = 0

for tweet in hotnostrils:
#     print tweet["created_at"]
#     print tweet["created_at"], tweet["user"]["followers_count"], tweet["user"]["friends_count"]

    userinfo[counter] = (tweet["created_at"], tweet["user"]["followers_count"], tweet["user"]["friends_count"])
    counter += 1
    

print "Start: ", userinfo[-1][0]
print "Followers: ", userinfo[-1][1]
print "Friends: ", userinfo[-1][2]
print ""
print "End: ", userinfo[0][0]
print "Followers: ", userinfo[0][1]
print "Friends: ", userinfo[0][2]


Start:  Sat Nov 15 17:22:43 +0000 2014
Followers:  845
Friends:  679

End:  Mon Nov 24 22:43:20 +0000 2014
Followers:  951
Friends:  749

In [67]:
userinfo[-1][0]


Out[67]:
u'Sat Nov 15 17:22:43 +0000 2014'

In [95]:
ffcount = pd.DataFrame(userinfo, columns = ["created_at", "followers", "friends"])
ffcount.head()


Out[95]:
created_at followers friends
0 Mon Nov 24 22:43:20 +0000 2014 951 749
1 Tue Nov 18 16:31:29 +0000 2014 852 685
2 Tue Nov 25 17:16:22 +0000 2014 973 766
3 Tue Nov 18 16:28:21 +0000 2014 852 685
4 Tue Nov 18 23:55:12 +0000 2014 854 684

In [96]:
_start = datetime.now()

%px import pandas as pd
#scatter dataframe to all cores.  If four cores are used, each core recieves 1/4 of the text
dview.scatter('ffcount', ffcount)

dview.execute("ffcount['date'] = pd.to_datetime(ffcount['created_at'])")
# dview.execute('ffcount["month"] = map(lambda x: x.month, ffcount.date)')
# dview.execute('ffcount["day"] = map(lambda x: x.day, ffcount.date)')
# dview.execute('ffcount["hour"] = map(lambda x:x.hour, ffcount.date)')
# dview.execute('ffcount["minute"] = map(lambda x:x.minute, ffcount.date)')
list_p = dview.gather('ffcount')
hotnos = pd.concat(list_p)

_end = datetime.now()
_diff = _end - _start
print "Preparing dataframe with parallel computing took {} seconds".format(_diff.total_seconds())


Preparing dataframe with parallel computing took 0.23231 seconds

In [97]:
hotnos.head()


Out[97]:
created_at followers friends date
0 Mon Nov 24 22:43:20 +0000 2014 951 749 2014-11-24 22:43:20
1 Tue Nov 18 16:31:29 +0000 2014 852 685 2014-11-18 16:31:29
2 Tue Nov 25 17:16:22 +0000 2014 973 766 2014-11-25 17:16:22
3 Tue Nov 18 16:28:21 +0000 2014 852 685 2014-11-18 16:28:21
4 Tue Nov 18 23:55:12 +0000 2014 854 684 2014-11-18 23:55:12

In [98]:
hotnos.sort("date", inplace=True)
hotnos.head()


Out[98]:
created_at followers friends date
1656 Tue Nov 11 22:17:47 +0000 2014 844 680 2014-11-11 22:17:47
3107 Tue Nov 11 22:24:14 +0000 2014 844 680 2014-11-11 22:24:14
1649 Tue Nov 11 22:33:51 +0000 2014 844 680 2014-11-11 22:33:51
3106 Tue Nov 11 22:36:03 +0000 2014 844 680 2014-11-11 22:36:03
1648 Tue Nov 11 22:39:05 +0000 2014 844 680 2014-11-11 22:39:05

In [ ]:
plt.figure(figsize=(20, 8))
plt.plot_date(x=md.date2num(hotnos["date"]), y=hotnos["followers"],  linestyle="-", marker=None, color="r", label="followers")
plt.plot_date(x=md.date2num(hotnos["date"]), y=hotnos["friends"], linestyle="-", marker=None, color="b", label="friends")
plt.legend()
plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-121-2f0d6bc55585> in <module>()
      1 plt.figure(figsize=(20, 8))
----> 2 plt.plot_date(x=date2num(hotnos["date"]), y=hotnos["followers"],  linestyle="-", marker=None, color="r", label="followers")
      3 plt.plot_date(x=date2num(hotnos["date"]), y=hotnos["friends"], linestyle="-", marker=None, color="b", label="friends")
      4 plt.legend()
      5 plt.show()

NameError: name 'date2num' is not defined
<matplotlib.figure.Figure at 0x7faf43760950>

In [80]:
# default dict info from http://evanmuehlhausen.com/simple-counters-in-python-with-benchmarks/
from collections import defaultdict

# orginal recipe tags
otags = defaultdict(int)

# dict for retweet tags counts
rtags = defaultdict(int)

# counter for the number of orginal tweets from thie user
original_count = 0

# go through tweets of the user
for tweet in hotnostrils:
    # get the tags
    tags = [hashtags["text"] for hashtags in tweet["entities"]["hashtags"]]
    
    # update the dict that is keeping count of the tag fq.
    for tag in tags:
        if "retweeted_status" in tweet:
            rtags[tag] += 1
        else:
            otags[tag] += 1
            original_count += 1

print "Go on..."


Go on...

Hashtag counts from both original and retweets


In [120]:
# operator info: http://stackoverflow.com/questions/613183/sort-a-python-dictionary-by-value
import operator

sorted_otags = sorted(otags.items(), key = operator.itemgetter(1))
sorted_rtags = sorted(rtags.items(), key = operator.itemgetter(1))

sorted_otags.reverse()
sorted_rtags.reverse()

print "User: @hotnostrilsrfun"
print ""

print "Original tweets: ", original_count
print "Retweets: ", len(hotnostrils) - original_count
print ""

print "Original Tweet Hashtags"

for tag in sorted_otags:
    print tag[0], ": ", tag[1]

print ""
    
print "Retweet Hashtags"

for tag in sorted_rtags[:25]:
    print tag[0], ": ", tag[1]


User: @hotnostrilsrfun

Original tweets:  1012
Retweets:  3098

Original Tweet Hashtags
Ferguson :  853
ferguson :  139
JusticeForZemir :  4
hilarity :  1
OathKeepers :  1
StopProfilingPolice :  1
speedbumps :  1
MikeBrown :  1
GetAClue :  1
SupportPolice :  1
dramaqueens :  1
jodiArias :  1
fuckedUp :  1
StLouis :  1
ThugMom :  1
shaw :  1
HighAsHell :  1
FERGUSON :  1
delusional :  1

Retweet Hashtags
Ferguson :  2653
ferguson :  389
tcot :  94
MikeBrown :  90
DarrenWilson :  66
MichaelBrown :  41
CNN :  24
FergusonDecision :  24
StLouis :  19
STL :  19
Obama :  17
FERGUSON :  17
mikebrown :  15
BlackLivesMatter :  15
ViolenceWillNotBeTolerated :  14
shaunking :  14
stl :  13
cnn :  12
JusticeForZemir :  12
tlot :  11
RedNationRising :  10
ccot :  10
pjnet :  9
HandsUpDontShoot :  8
MSNBC :  8

In [114]:
rtg = pd.DataFrame(sorted_rtags[:25], columns = ["hashtag", "num"])
rtg.head()


Out[114]:
hashtag num
0 Ferguson 2653
1 ferguson 389
2 tcot 94
3 MikeBrown 90
4 DarrenWilson 66

In [119]:
x = np.arange(len(rtg))

plt.figure(figsize=(20, 4))
plt.bar(x, rtg["num"])
plt.xticks(x, rtg["hashtag"], rotation=90)
plt.xlim(0, len(rtg))
plt.show()


Text of original tweets


In [31]:
# # NOTE: THIS IS FROM TWEETS_SMALL
# for tweet in hotnostrils:
#     if "retweeted_status" not in tweet:
#         print tweet["text"]


I used to think I was a liberal! Voted for Obama, TWICE! Never Again! I will be a republican until the day I die!  FUCK OBAMA! #Ferguson
@JaneQCitizen16 I so hope Bassem misses the announcement &amp; spends it behind bars! #Ferguson
So where's Bassem this glorious morning? Still in the pokey? #Ferguson
@KiriLanice @JTPuhlyak @zellieimani It's not white fear gun buyin, its determination. I'd do exactly the same. Fuck the Rioters #Ferguson
Would rather repeatedly jam a rusty fork in my left eyeball than read @deray newsletter. #Ferguson
Where I live, HARRASSING a police officer is a crime. Don't understand why it isn't in  #Ferguson
Shooter Florida university didn't follow directions, instead fired at police, shot dead. That's the way it works. Take Note #Ferguson
So did the police arrest any black people tonight? Bassem, white "pastor" and who else?  #Ferguson
And while I'm at it. SCREW the KKK and ANONYMOUS and the BLACK PANTHERS and the RIOTERS! #Ferguson
Protesters can call themselves peaceful all they want. No one is convinced. Everyone knows the truth.  #Ferguson
I like to start a fund:  Bassem a one way ticket to Palestine.  Where he can talk shit &amp; throw rocks, while hiding behind a child #Ferguson
http://t.co/fxuKyTYl0B #Ferguson
#Ferguson Police again showed protesters whose street! (Really slow learners)
Law Enforcement  SHUT BASSEMS SHIT DOWN TONIGHT in #Ferguson
@SheeitDawg @aroadwarrior1 I think he's a professional snitch that's trying hard to parlay the exposure in #Ferguson into a media gig.
Wouldn't it be FANTASTIC if Bassem missed the Grand Jury decision? Had to spend the next week or so behind bars! 😂#Ferguson
So why aren't protesters begging for bail money? Did Al Sharpton or lard ass Rosie O'donnel  donate $? LMAO! #Ferguson
It's probably a lie that Bassems phone was smashed, just another ploy to beg for $$ #Ferguson
Passing this on. Hope you can arrest this one! @stlcountypd #Ferguson http://t.co/piYdJy9IsG
Law Enforcement did an exceptional job tonight! Thank you! @stlcountypd   #Ferguson
@CassandraRules noooooo keep Bassem locked up! #Ferguson
Peaceful? Pffft!  Walk up to any protester and speak to them the way they do the police. Then try to convince them it's peaceful. #Ferguson
Stupid mother fuckers still don't understand it's dangerous to play in the street. Even after protester has foot run over! SMH  #Ferguson
@TallyAnnaE @melaninbarbie Just checking to see if the protesters learned. They did, got out of the street lightening fast! #Ferguson
More convinced than ever that Bassem is an informant. #Ferguson
@CinnaAyn you know blocking the street traffic is illegal, right? Don't whine when arrested. #Ferguson
@CinnaAyn Bassem is on his way. Not sure that's the kinda "help" you need. #1 agitator. #Ferguson
@CinnaAyn Why do peaceful protesters need "backup" at #Ferguson PD?
@AurielEbonie Why do peaceful protesters need backup at #Ferguson PD?
@ArrestWilson distance between Brown &amp; SUV irrelevant. important is distance between Wilson &amp; Brown at the end. Less than 20ft #Ferguson
I find it rather amusing that those protesters spewing hate whitey &amp; ignorance have a problem with the KKK being equally stupid!  #Ferguson
@rickybird11811 @Pharma_Trader  I haven't watched since Whoopi said Polanski didn't "rape rape" that 14 yr old he drugged.  #Ferguson
@bassem_masri @PattiLongworth Weren't you also cheering when the liars claimed at first a cop was killed? #Ferguson
Being educated, law abiding &amp; productive citizen isn't a white thing. It's being a civilized member of society. #Ferguson
@PinxJinx @lil_zoo314 I would bet DORIAN JOHNSON did too, especially w/felony murder over his head. #Ferguson
@MiZzYouniqu3 @UnitedFrontRev @Ms_TeeWiz Right! I'm a woman! Shopping is in my blood! I'll shop wherever &amp; whenever I like! #Ferguson
@UnitedFrontRev @Ms_TeeWiz @MiZzYouniqu3 You don't invest in your community by holding it hostage ASSHOLE! #Ferguson
@UnitedFrontRev @Ms_TeeWiz @MiZzYouniqu3 I never shop on Black Friday, this year I'm going to make it a point to SHOP! #Ferguson
Is anyone shocked media witnesses sang different tune, under oath, before the grand jury? I'm not!  #Ferguson
When are they going to arrest gang banging, pipe toting, THUG MOM?  #Ferguson
@stackizshort STAY WOKE? You sound every bit the idiot you are. #Ferguson
Those LYING in the media deserve to be thrown to the protesters! PUBLISH their names when GJ evidence is released!  #Ferguson
The LYING MEDIA WHORES that started the BIG LIE that is  #Ferguson don't deserve to have names redacted from their truthful GJ testimony
@outintherain84 @CNN I agree the LYING MEDIA WHORES that started the BIG LIE that is #Ferguson needs their names PUBLISHED!
MUST READ  http://t.co/T58O7CLqmz
Witnesses sing different tune before GJ than in the media! http://t.co/cB5E2j3b4W DUH! Ghost Dorian
@nedmiller Shhhh the correction officer rapist was black. Protesters don't want it mentioned;) #Ferguson
Would be hilarious if after GJ announcement nothing happened. Media &amp; law enforcement would look so silly. Protesters respected in #Ferguson
Sandwich shooter shot down trying to MURDER a cop  http://t.co/0GnQzZ6iqc #Ferguson
Sandwich shooter.  http://t.co/0GnQzZ6iqc #Ferguson
@CNN  http://t.co/iFgWq1LAS5 #Ferguson
People protesting in #Ferguson joined up for the same reason people join street gangs. "Family" they never had.
Only person that put no value on Mike Browns life, was Mike Brown. He threw his away. ThugLife=NoLife  #Ferguson
@tcaLdarez @doggy_yuck All protesters are to blame when they shield &amp; invite the violent ones. #Ferguson
@crystal_faith88 Is there something happening in #Ferguson ? lol
@shawna2727 @akacharleswade If Brown could have been your child, I urge you to take parenting classes. #Ferguson
Just not seeing anything to get worked up about. Looks like a loser that plays dress up in moms basement #Ferguson http://t.co/v7Dw84hhuc
@WayneDupreeShow Surely, the police are behind the car theft &amp; burgled home of protesters! lol #Ferguson
Shocking! White neighbor tells her son's the same things I've taught mine about interacting with law enforcement! #Ferguson
My white neighbors are thankful their sons get speeding tickets, says hers needed to LEARN the hard way.  #Ferguson
Asked my white neighbors specifically  if their sons get stopped by police, answer, yes. Parents thankful for the extra eyes.  #Ferguson
Honey Boo Boo's mom is giving Mike Browns mom some stiff competition for Mother of the Year #Ferguson Style!